library(haven)
gcse <- read_dta("data/gcse.dta")
library(ggplot2)
ggplot(data = gcse, mapping = aes(x = lrt, y = gcse)) +
facet_wrap(~ school, ncol = 5) +
geom_point(alpha = 0.5) +
geom_smooth(method = "lm", se = FALSE) +
labs(caption = "grouped by school")
Alternatively, we can randomly select 25 schools to display, and add a yellow line to show the overall regression line for the full data set to each facet.
library(dplyr)
num_schools <- 25
filter_gcse <- gcse %>%
filter(school %in% sample(unique(school), size = num_schools,
replace = FALSE))
ggplot(data = filter_gcse, mapping = aes(x = lrt, y = gcse)) +
facet_wrap(~ school, ncol = 5) +
geom_point(alpha = 0.5) +
geom_smooth(method = "lm", se = FALSE) +
geom_smooth(data = dplyr::select(gcse, -school), method = "lm",
color = "yellow") +
labs(caption = "grouped by school")
First we generate some longitudinal data (adapted from here).
library(MASS)
library(nlme)
library(purrr)
### set number of individuals
n <- 200
### average intercept and slope
beta0 <- 1.0
beta1 <- 6.0
### true autocorrelation
ar.val <- .4
### true error SD, intercept SD, slope SD, and intercept-slope cor
sigma <- 1.5
tau0 <- 2.5
tau1 <- 2.0
tau01 <- 0.3
### maximum number of possible observations
m <- 10
### simulate number of observations for each individual
p <- round(runif(n, 4, m))
### set up data frame
dat <- map2_df(.x = 1:n, .y = p, .f = function(x, y, m) {
data.frame(
id = rep(x, times = y),
group = factor(rbinom(n = 1, size = 3, prob = 0.5), levels = 0:3),
obs = c(1, sort(sample(2:m, y - 1, replace = FALSE)))
)
}, m = m)
### simulate (correlated) random effects for intercepts and slopes
mu <- c(0, 0)
S <- matrix(c(1, tau01, tau01, 1), nrow = 2)
tau <- c(tau0, tau1)
S <- diag(tau) %*% S %*% diag(tau)
U <- mvrnorm(n, mu=mu, Sigma=S)
### simulate AR(1) errors and then the actual outcomes
dat$e <- unlist(sapply(p, function(x) arima.sim(model=list(ar=ar.val), n=x) * sqrt(1-ar.val^2) * sigma))
dat$y <- (beta0 + rep(U[,1], times = p)) + (beta1 + rep(U[, 2], times = p)) * log(dat$obs) + dat$e
First we create a blank template plot with some formatting
p <- ggplot(data = dat, mapping = aes(x = obs, y = y)) +
scale_x_continuous(limits = c(1, 10), breaks = seq(1, 10, 1)) +
theme_bw()
p
Now, we create our initial scatter plot.
p + geom_point()
Draw a line for each individual by assigning group to the person identifier.
p + geom_point() +
geom_line(aes(group = id))
Remove the points and add an overall loess trend line.
p + geom_line(aes(group = id)) +
geom_smooth(method = "loess", color = "red", se = FALSE)
Split out by group, drawing a trend line for each group individually.
p + geom_line(aes(group = id)) +
geom_smooth(method = "loess", color = "red", se = FALSE) +
facet_wrap(~ group)
We can add the overall trend line by adding a second geom_smooth layer that uses the same data, but without the facetting variable.
p + geom_line(aes(group = id)) +
geom_smooth(method = "loess", color = "red", se = FALSE) +
geom_smooth(data = dplyr::select(dat, -group), method = "loess", color = "blue",
se = FALSE) +
facet_wrap(~ group)
Finally, add a color aesthetic so a legend will be created, remove the space between facet panels, and remove grid lines. We can also save the plot using ggsave.
p <- p + geom_line(aes(group = id)) +
geom_smooth(aes(color = "Group Trend"), method = "loess", se = FALSE) +
geom_smooth(data = dplyr::select(dat, -group), aes(color = "Overall Trend"),
method = "loess", se = FALSE) +
facet_wrap(~ group) +
scale_color_manual(name = NULL, values = c("red", "blue")) +
theme(
legend.position = "bottom",
panel.spacing = unit(0, "lines"),
panel.grid = element_blank()
)
ggsave("saved_p.png", p, width = 12, height = 8, units = "in")
# can also print to screen
p